import urllib
import urllib2
import re
page = 2
url = 'http://www.qiushibaike.com/text/page/' + str(page)
user_agent = 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'
header={'User-Agent':user_agent}
try:
    request=urllib2.Request(url,headers=header)
    response=urllib2.urlopen(request)
    content = response.read().decode('utf-8')
    print content
    pattern = re.compile('<div.*?author">.*?<a.*?<img.*?>(.*?)</a>.*?<div.*?' +
                         'content">(.*?)<!--(.*?)-->.*?</div>(.*?)<div class="stats.*?class="number">(.*?)</i>', re.S)
    items = re.findall(pattern, content)
    for item in items:
        print item[0], item[1], item[2], item[3], item[4]

except urllib2.URLError as e:
    if hasattr(e,"code"):
        print e.code
    if hasattr(e,'reason'):
        print e.reason